Tidy Tuesday Series

2018 Week 32 - US Wind Farm Locations

lruolin
06-18-2021

Load Packages

Load Data from tidytuesdayR package

# to download data
tt_data <- tt_load(2018, week = 32)


# to view readme
readme(tt_data)

recent_grads <- tt_data$us_wind

Explore Dataset

glimpse(us_wind)
Rows: 58,185
Columns: 24
$ case_id    <dbl> 3073429, 3071522, 3073425, 3071569, 3005252, 3003…
$ faa_ors    <chr> "missing", "missing", "missing", "missing", "miss…
$ faa_asn    <chr> "missing", "missing", "missing", "missing", "miss…
$ usgs_pr_id <dbl> 4960, 4997, 4957, 5023, 5768, 5836, 4948, 5828, 4…
$ t_state    <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "…
$ t_county   <chr> "Kern County", "Kern County", "Kern County", "Ker…
$ t_fips     <chr> "06029", "06029", "06029", "06029", "06029", "060…
$ p_name     <chr> "251 Wind", "251 Wind", "251 Wind", "251 Wind", "…
$ p_year     <dbl> 1987, 1987, 1987, 1987, 1987, 1987, 1987, 1987, 1…
$ p_tnum     <dbl> 194, 194, 194, 194, 194, 194, 194, 194, 194, 194,…
$ p_cap      <dbl> 18.43, 18.43, 18.43, 18.43, 18.43, 18.43, 18.43, …
$ t_manu     <chr> "Vestas", "Vestas", "Vestas", "Vestas", "Vestas",…
$ t_model    <chr> "missing", "missing", "missing", "missing", "miss…
$ t_cap      <dbl> 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 9…
$ t_hh       <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_rd       <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_rsa      <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_ttlh     <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_conf_atr <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ t_conf_loc <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ t_img_date <chr> "1/1/2012", "1/1/2012", "1/1/2012", "7/31/2016", …
$ t_img_srce <chr> "NAIP", "NAIP", "NAIP", "Digital Globe", "Digital…
$ xlong      <dbl> -118.3607, -118.3612, -118.3604, -118.3640, -118.…
$ ylat       <dbl> 35.08378, 35.08151, 35.08471, 35.07942, 35.08559,…

Count state

us_wind %>% 
  count(t_state, sort = T)
# A tibble: 45 x 2
   t_state     n
   <chr>   <int>
 1 TX      13232
 2 CA       9037
 3 IA       4280
 4 OK       3821
 5 KS       2898
 6 IL       2602
 7 MN       2547
 8 CO       2278
 9 OR       1868
10 WA       1744
# … with 35 more rows

Count project names

us_wind %>% 
  count(p_name, sort = T)
# A tibble: 1,479 x 2
   p_name                                     n
   <chr>                                  <int>
 1 unknown Tehachapi Wind Resource Area 1  1831
 2 Green Ridge Power                        516
 3 Stateline Wind Project                   440
 4 Mesa Wind Farm                           432
 5 Sky River                                335
 6 Cedar Creek                              274
 7 Peetz Table                              267
 8 Flat Ridge 2                             261
 9 Rolling Hills                            259
10 Woodward Mountain I & II                 242
# … with 1,469 more rows

Plot longitude and latitude

us_wind %>% 
  filter(xlong<100) %>% # filter out outlier
  ggplot(aes(xlong, ylat)) +
  geom_point() +
  borders("state") +
  coord_map() +
  theme_void()

Distribution of wind turbines in US States

count_states <- us_wind$t_state %>% factor() %>% fct_count() 

us_wind %>% 
  filter(!t_state %in% c("AK", "HI", "GU", "PR")) %>% # Exclude Alaska, Hawaii, Guam, Puerto Rico
  ggplot(aes(xlong, ylat)) +
  geom_point() +
  borders("state") +
  coord_map() +
  labs(title = "Distribution of wind turbines in US",
       subtitle = "Most wind turbines are situated along middle of US.",
       caption = "Source: USGS.gov") +
  theme_void()

Projects

us_wind_raw <- us_wind

us_wind_processed <- us_wind %>%
  filter(!t_state %in% c("AK", "HI", "GU", "PR")) %>% 
  na_if(-9999) # replace -9999 as na

wind_projects <- us_wind_processed %>% 
  group_by(p_name, t_state) %>% 
  summarise(turbines = n(),
            long = mean(xlong),
            lat = mean(ylat),
            long_sd = sd(xlong),
            lat_sd = sd(ylat))

wind_projects %>% 
  ggplot(aes(long, lat, col = turbines, size = turbines)) +
  geom_point(aes(size = turbines), show.legend = T) +
  scale_color_continuous(type = "viridis") +
  borders("state") +
  coord_map() +
  labs(title = "Distribution of projects in US",
       subtitle = "The bigest project is 251 Wind, in California",
       caption = "Source: usgs.gov") +
  theme_void()

To find out what is the biggest project:

us_wind_processed %>% 
  count(p_name, t_state)
# A tibble: 1,440 x 3
   p_name                      t_state     n
   <chr>                       <chr>   <int>
 1 251 Wind                    CA        190
 2 30 MW Iowa DG Portfolio     IA         10
 3 6th Space Warning Squadron  MA          2
 4 Adair                       IA         76
 5 Adams                       IA         64
 6 Adams Wind Generations, LLC MN         12
 7 AFCEE MMR Turbines          MA          2
 8 AG Land 1                   IA          1
 9 AG Land 2                   IA          1
10 AG Land 3                   IA          1
# … with 1,430 more rows

Year

wind_projects <- us_wind_processed %>% 
  group_by(p_name, t_state) %>% 
  summarise(year = min(p_year, na.rm = T), # first year project started
            turbines = n(),
            total_capacity_kw = sum(t_cap, na.rm = T),
            lon = mean(xlong),
            lat = mean(ylat),
            lon_sd = sd(xlong),
            lat_sd = sd(ylat))

wind_projects %>% 
  ggplot(aes(year)) +
  geom_histogram(fill = "deepskyblue4") +
  labs(title = "Distribution of projects by year",
       subtitle = "Wind Turbine Projects gained momentum after 2000",
       caption = "Source: usgs.gov") +
  theme_clean()

wind_projects %>% 
  ggplot(aes(lon, lat, size = turbines, col = year))+
  geom_point(aes(size = turbines), show.legend = T) +
  scale_color_continuous(type = "viridis") +
  borders("state") +
  coord_map() +
  labs(title = "Age and Scale of US Wind Turbine Projects",
       subtitle = "251 Wind in CA is the oldest project, and the newer projects are situated along middle of the country",
       caption = "Source: usgs.gov") +
  theme_void()

Capacity

us_wind_processed %>% 
  distinct(p_name, p_cap) %>% # capacity
  count(p_name, sort = T)
# A tibble: 1,425 x 2
   p_name                        n
   <chr>                     <int>
 1 McNeilus                      5
 2 Bishop Hill I                 3
 3 Blue Summit                   3
 4 Capricorn Ridge               3
 5 Capricorn Ridge expansion     3
 6 Case Western University       3
 7 Century Expansion             3
 8 Crossroads                    3
 9 Crow Lake                     3
10 Horse Hollow II               3
# … with 1,415 more rows
us_wind_processed %>% 
  group_by(p_name, t_state) %>% 
  summarise(year = min(p_year, na.rm = T), # first year project started
            turbines = n(),
            total_capacity_kw = sum(t_cap, na.rm = T),
            lon = mean(xlong),
            lat = mean(ylat),
            lon_sd = sd(xlong),
            lat_sd = sd(ylat)) %>% 
  ungroup()
# A tibble: 1,440 x 9
   p_name         t_state  year turbines total_capacity_…    lon   lat
   <chr>          <chr>   <dbl>    <int>            <dbl>  <dbl> <dbl>
 1 251 Wind       CA       1987      190            18050 -118.   35.1
 2 30 MW Iowa DG… IA       2017       10            30000  -93.4  42.0
 3 6th Space War… MA       2013        2             3360  -70.5  41.8
 4 Adair          IA       2008       76           174800  -94.7  41.5
 5 Adams          IA       2016       64           154284  -94.7  40.9
 6 Adams Wind Ge… MN       2011       12            20040  -94.7  44.9
 7 AFCEE MMR Tur… MA       2011        2             3000  -70.5  41.8
 8 AG Land 1      IA       2012        1             1600  -93.3  42.2
 9 AG Land 2      IA       2012        1             1600  -93.4  42.1
10 AG Land 3      IA       2012        1             1600  -93.4  42.1
# … with 1,430 more rows, and 2 more variables: lon_sd <dbl>,
#   lat_sd <dbl>

How has turbine capacity changed over time?

turbine <- us_wind_processed %>% 
  group_by(p_name, t_state) %>% 
  summarise(year = min(p_year, na.rm = T), # first year project started
            turbines = n(),
            total_capacity_kw = sum(t_cap),
            lon = mean(xlong),
            lat = mean(ylat),
            lon_sd = sd(xlong),
            lat_sd = sd(ylat)) %>% 
  ungroup()


turbine %>% 
  ggplot(aes(year, total_capacity_kw/turbines)) +
  geom_point() +
  geom_smooth(method= "lm") +
  labs(title = "Change in Total Capacity per Turbine over Time",
       subtitle = "Total Capacity per Turbine increased over time",
       caption = "Source: usgs.gov") +
  theme_few()

Turbine models

turbine_models <- us_wind_processed %>% 
  group_by(t_model) %>% 
  summarize(t_cap = median(t_cap), # turbine capacity (kW)
            t_hh = median(t_hh), # turbine hub height (m)
            t_rd = median(t_rd), # turbine rotor diameter (m)
            t_rsw = median(t_rsa), # turbine rotor swept area (m2)
            t_ttlh = median(t_ttlh), # turbine total height calculated (m)
            turbines = n(), # number of turbines
            projects = n_distinct(p_name)) %>%  # number of projects
  arrange(desc(projects))

turbine_models %>% 
  ggplot(aes(t_ttlh, t_cap)) +
  geom_point() +
  labs(title = "Relationship between turbine height and capacity",
       subtitle = "Taller Turbines have higher capacity",
       x = "Turbine Total Height Calculated (m)",
       y = "Turbine Capacity (kW)") +
  theme_clean()

Learning points:

References

https://www.youtube.com/watch?v=O1oDIQV6VKU&list=PL19ev-r1GBwkuyiwnxoHTRC8TTqP8OEi8&index=78

Citation

For attribution, please cite this work as

lruolin (2021, June 18). pRactice corner: Tidy Tuesday Series. Retrieved from https://lruolin.github.io/myBlog/posts/20210619_Tidytuesday wind data/

BibTeX citation

@misc{lruolin2021tidy,
  author = {lruolin, },
  title = {pRactice corner: Tidy Tuesday Series},
  url = {https://lruolin.github.io/myBlog/posts/20210619_Tidytuesday wind data/},
  year = {2021}
}